#note: the script must be run in sequentially to ensure that 
    #propert subsample is attached for each analysis

setwd("C:\\Documents and Settings\\John\\My Documents\\Server\\Tree_Submission\\JELS\\Replication")

rm(list = ls())     # remove all stored objects   
library(rpart) #library for running trees
library(arm) #for using "attach.all" function
library(foreign)    # allow for import of STATA datset

confession.data <-read.dta("confessions_data_replication.dta", convert.underscore = T)     
confession.data$dec <- factor(confession.data$panel.vote, levels=0:1, labels=c("Al", "Ex")) # let R know that DV is categorical
detach()
attach.all(confession.data) #attach foe court data

############################################
############################################
# Figure 10, Period 1: #1946-1966
############################################
############################################

keep <- year < 1967
confess1<-rpart(dec ~ circumstances + personal + procedure + magyes   
    + no.warn + attorney + mitigate.total + fruit2, subset = keep, cp = .0001,
    minsplit = 15,  method = "class")
printcp(confess1, digits = 2)#print cp table
post(confess1, file = "")

#Prune Tree - use .p to indicate pruned trees
confess1.p <- prune(confess1, cp = .04)
printcp(confess1.p, digits =2)
  #PLOT TREE
post(confess1.p, file = "")
#Resubstitution Predictions
re.confess1 <- ifelse((predict(confess1.p, type="vector")) == 2, 1, 0)#Resubsitution prediction
    #Recode prediction vector to 0,1 not 1,2
table(re.confess1,dec[keep])#

#Proportional reduction in error
#100 x [(% correctly predicted - % in modal category )/(100%-% in modal category.]
modal.cat.confess1 <- ifelse((length(panel.vote[keep])-sum(panel.vote[keep]))/length(panel.vote[keep]) > .5, #since modal cat is 0
   (length(panel.vote[keep])-sum(panel.vote[keep]))/length(panel.vote[keep]),                            #need ifelse command
   1 - (length(panel.vote[keep])-sum(panel.vote[keep]))/length(panel.vote[keep]))                       
cor.re.confess1 <- ifelse(re.confess1==panel.vote[keep],1,0)
pred.cor.re.confess1  <- sum(cor.re.confess1)/length(panel.vote[keep])
pred.cor.re.confess1 #Resubstitution prediction rate
pre.re.confess1 <- 100*((100*pred.cor.re.confess1 - 100*modal.cat.confess1)/(100-100*modal.cat.confess1))
pre.re.confess1 # Resubstituion PRE

#Cross-Validated Prediction
    #simulate 1000 times and take mean
n.sims <- 1000
pre.xv.confess1 <- rep(NA, length(n.sims))
pred.correct <- rep(NA, length(n.sims))
for (i in 1:n.sims){
xv.confess1 <- ifelse((xpred.rpart(confess1.p, xval=10, cp = .02)) == 2, 1, 0) #cross-validated prediction; 
correct.xv.confess1 <- ifelse(xv.confess1==panel.vote[keep],1,0)
pred.cor.xv.confess1  <- sum(correct.xv.confess1)/length(panel.vote[keep])
pred.correct[i] <- pred.cor.xv.confess1
pre.xv.confess1[i] <- 100*((100*pred.cor.xv.confess1 - 100*modal.cat.confess1)/(100-100*modal.cat.confess1))
}
print(summary(pred.correct)) #Cross-validation prediction rate
print(summary(pre.xv.confess1))#Cross-validation PRE


############################################
############################################
# Figure 11, Period 2: #1967-1971
############################################
############################################

detach()
attach.all(confession.data)

keep <- year > 1966 & year < 1972
confess2 <- rpart(dec ~ circumstances + personal + procedure + magyes   
    + no.warn + attorney + mitigate.total + fruit2, subset = keep, cp = .0001,
    minsplit = 15,  method = "class")
printcp(confess2, digits = 2)
post(confess2, file = "")

#Prune Tree - use .p to indicate pruned trees
confess2.p <- prune(confess2, cp = .001)
printcp(confess2.p, digits = 2)
  #PLOT TREE
post(confess2.p, file = "")
#Resubstitution Predictions
re.confess2 <- ifelse((predict(confess2.p, type="vector")) == 2, 1, 0)#Resubsitution prediction
    #Recode prediction vector to 0,1 not 1,2
table(re.confess2,dec[keep])

#Proportional reduction in error
#100 x [(% correctly predicted - % in modal category )/(100%-% in modal category.]
modal.cat.confess2 <- ifelse((length(panel.vote[keep])-sum(panel.vote[keep]))/length(panel.vote[keep]) > .5, #since modal cat is 0
   (length(panel.vote[keep])-sum(panel.vote[keep]))/length(panel.vote[keep]),                            #need ifelse command
   1 - (length(panel.vote[keep])-sum(panel.vote[keep]))/length(panel.vote[keep]))                       
cor.re.confess2 <- ifelse(re.confess2==panel.vote[keep],1,0)
pred.cor.re.confess2  <- sum(cor.re.confess2)/length(panel.vote[keep])
pred.cor.re.confess2 #Resubstitution prediction rate
pre.re.confess2 <- 100*((100*pred.cor.re.confess2 - 100*modal.cat.confess2)/(100-100*modal.cat.confess2))
pre.re.confess2 #Resubstitution PRE

#Cross-Validated Prediction
    #simulate 1000 times and take mean
n.sims <- 1000
pre.xv.confess2 <- rep(NA, length(n.sims))
pred.correct <- rep(NA, length(n.sims))
for (i in 1:n.sims){
xv.confess2 <- ifelse((xpred.rpart(confess2.p, xval=10, cp = .02)) == 2, 1, 0) #cross-validated prediction; 
correct.xv.confess2 <- ifelse(xv.confess2==panel.vote[keep],1,0)
pred.cor.xv.confess2  <- sum(correct.xv.confess2)/length(panel.vote[keep])
pred.correct[i] <- pred.cor.xv.confess2
pre.xv.confess2[i] <- 100*((100*pred.cor.xv.confess2 - 100*modal.cat.confess2)/(100-100*modal.cat.confess2))
}
print(summary(pred.correct)) #Cross-validation prediction rate
print(summary(pre.xv.confess2)) #Cross-validation PRE


############################################
############################################
# Figure 12, Period 3: #1967-1971
############################################
############################################
detach()
attach.all(confession.data)

keep <- year > 1971
confess3 <- rpart(dec ~ circumstances + personal + procedure + magyes   
    + no.warn + attorney + mitigate.total + fruit2, subset = keep, cp = .0001,
    minsplit = 15,  method = "class")
printcp(confess3, digits = 2)
post(confess3, file = "")


# Prune Tree -- use .p to indicate pruned trees
confess3.p <- prune(confess3, cp = .001)
printcp(confess3.p, digits = 2)
  #PLOT TREE
post(confess3.p, file = "")

#Resubstitution Predictions
re.confess3 <- ifelse((predict(confess3.p, type="vector")) == 2, 1, 0)#Resubsitution prediction
    #Recode prediction vector to 0,1 not 1,2
table(re.confess3,dec[keep])

#Proportional reduction in error
#100 x [(% correctly predicted - % in modal category )/(100%-% in modal category.]
modal.cat.confess3 <- ifelse((length(panel.vote[keep])-sum(panel.vote[keep]))/length(panel.vote[keep]) > .5, #since modal cat is 0
   (length(panel.vote[keep])-sum(panel.vote[keep]))/length(panel.vote[keep]),                            #need ifelse command
   1 - (length(panel.vote[keep])-sum(panel.vote[keep]))/length(panel.vote[keep]))                       
cor.re.confess3 <- ifelse(re.confess3==panel.vote[keep],1,0)
pred.cor.re.confess3  <- sum(cor.re.confess3)/length(panel.vote[keep])
pred.cor.re.confess3 #Resubstitution prediction rate
pre.re.confess3 <- 100*((100*pred.cor.re.confess3 - 100*modal.cat.confess3)/(100-100*modal.cat.confess3))
pre.re.confess3 #Resubstitution PRE

#Cross-Validated Prediction
    #simulate 1000 times and take mean
n.sims <- 1000
pre.xv.confess3 <- rep(NA, length(n.sims))
pred.correct <- rep(NA, length(n.sims))
for (i in 1:n.sims){
xv.confess3 <- ifelse((xpred.rpart(confess3.p, xval=10, cp = .02)) == 2, 1, 0) #cross-validated prediction; 
correct.xv.confess3 <- ifelse(xv.confess3==panel.vote[keep],1,0)
pred.cor.xv.confess3  <- sum(correct.xv.confess3)/length(panel.vote[keep])
pred.correct[i] <- pred.cor.xv.confess3
pre.xv.confess3[i] <- 100*((100*pred.cor.xv.confess3 - 100*modal.cat.confess3)/(100-100*modal.cat.confess3))
}
print(summary(pred.correct)) #Cross-validation prediction rate
print(summary(pre.xv.confess3)) #Cross-validation PRE
